Setup

# Import functions
root = "./"
source(file.path(root,"general_functions.R"))
import_parameters(params)
## **** __Utilized Cores__ **** = 2$subsetGenes
## [1] "protein_coding"
## 
## $subsetCells
## [1] 500
## 
## $resolution
## [1] 0.6
## 
## $resultsPath
## [1] "./Results"
## 
## $nCores
## [1] 2
## 
## $perplexity
## [1] 30
# load(file.path(resultsPath, "scRNAseq_results.RData"))
resultsPath <- "Results/subsetGenes-protein_coding__subsetCells-F__Resolution-0.2__perplexity-40__nCores-4"
load(file.path(resultsPath,"scRNAseq_results.RData"))

Load Libraries

library(Seurat)
library(cowplot)
library(ggplot2)
library(dplyr) 
library(data.table) 
library(readxl) 
library(reshape2)
library(ggrepel)
library(plotly)

library(GeneOverlap) # BiocManager::install("GeneOverlap")
library(monocle) # BiocManager::install("monocle") 
# BiocManager::install(c('DelayedArray', 'DelayedMatrixStats', 'org.Hs.eg.db', 'org.Mm.eg.db'))
library(org.Hs.eg.db)
library(garnett) # devtools::install_github("cole-trapnell-lab/garnett")
library(enrichR) #BiocManager::install("enrichR")

Identify Cell Types with Garnett + Monocle

Pre-trained PBMC Classifer

# Convert Seurat objt to CDS object 
mDAT <- monocle::importCDS(DAT,  import_all = T) 
# generate size factors for normalization later
mDAT <-  DESeq2::estimateSizeFactors(mDAT)
# Get pre-trained PBMC classifer 
load(file.path(root, "Data/Garnett/hsPBMC")) # Download from: https://cole-trapnell-lab.github.io/garnett/classifiers/hsPBMC  

mDAT <- garnett::classify_cells(mDAT, hsPBMC,
                           db = org.Hs.eg.db,
                           cluster_extend = TRUE,
                           cds_gene_id_type = "SYMBOL")
## Warning in doTryCatch(return(expr), name, parentenv, handler): The
## following genes used in the classifier are not present in the input CDS.
## Interpret with caution. ENSG00000174059The following genes used in the
## classifier are not present in the input CDS. Interpret with caution.
## ENSG00000007062The following genes used in the classifier are not present
## in the input CDS. Interpret with caution. ENSG00000157404The following
## genes used in the classifier are not present in the input CDS. Interpret
## with caution. ENSG00000185291
table(pData(mDAT)$cell_type)
## 
##         B cells           CD34+     CD4 T cells     CD8 T cells 
##              13              92               3               1 
## Dendritic cells       Monocytes        NK cells         T cells 
##             280           15025             801              16 
##         Unknown 
##            2913
table(pData(mDAT)$cluster_ext_type) 
## 
##         B cells           CD34+     CD4 T cells Dendritic cells 
##              12              17               3             165 
##       Monocytes        NK cells         T cells         Unknown 
##           17165             701              15            1066
# Get feature genes for each cell type
feature_genes <- garnett::get_feature_genes(classifier = hsPBMC,
                                   node = "root",
                                   db = org.Hs.eg.db,
                                   convert_ids = F)
head(feature_genes) 
##                      B cells       CD34+ Dendritic cells     Monocytes
## (Intercept)     -0.884989922  0.47694427    -0.124629734 -0.0301907756
## ENSG00000196154 -0.005499815 -0.01088859    -0.007978714  0.0056365451
## ENSG00000019582  0.030146634 -0.01959215     0.015232191  0.0001883507
## ENSG00000156738  2.193353690 -0.66191294    -1.104899464 -0.7393680112
## ENSG00000177455  2.056641938 -0.71851810    -1.941859710 -1.0448682467
## ENSG00000105369  1.800296145 -0.53835614     0.063380804 -0.1973728377
##                     NK cells      T cells     Unknown
## (Intercept)     -0.911657164  0.524757370 0.949765956
## ENSG00000196154 -0.001627233 -0.005835858 0.026193667
## ENSG00000019582 -0.005880096 -0.023473024 0.003378099
## ENSG00000156738 -0.096566144 -0.602417257 1.011810124
## ENSG00000177455  0.297279508  0.177790907 1.173533704
## ENSG00000105369 -0.618567693 -0.963141845 0.453761563
# Convert back to Seurat object & save results
DAT <- Seurat::AddMetaData(DAT, pData(mDAT)[c("garnett_cluster","cell_type","cluster_ext_type")])
write.csv(DAT@meta.data, file.path(resultsPath, "garnett_results.csv"), quote = F,row.names = F)

Biomarker Expression

markerList <- c("CD14", "FCGR3A")
markerData <- DAT@scale.data[row.names(DAT@scale.data) %in% markerList,] %>% t() %>% data.frame()  
## Efficiently merge using data.table
dt1 <- data.table(markerData, keep.rownames = "Cell", key = "Cell") %>% copy()
dt2 <- data.table(DAT@meta.data[c("cell_type","post_clustering")], keep.rownames = "Cell", key = "Cell") %>% copy()
markerDT <- dt1[dt2]
 
ggplot(data = markerDT, aes(x=CD14, y=FCGR3A) ) + 
  stat_density_2d(aes(fill = ..level..), geom = "polygon", colour="purple", bins = 100, size=.1) +
  geom_point(aes(color=as.factor(cell_type)), shape=16, stroke=0, size=2, alpha=.1) + 
  guides(colour = guide_legend(title="cell_type",override.aes = list(alpha = 1))) 

tSNE Metadata Plots

tSNE_metadata_plot <- function(var, labSize=12){ 
  # Metadata plot  
  p1 <- Seurat::TSNEPlot(DAT, do.return = T,  do.label = T,  group.by = var,label.size = labSize,
                 plot.title=paste(var), vector.friendly=T) +
    theme(legend.position = "top") + 
    scale_color_brewer( palette = "Dark2",  aesthetics = aes(alpha=.5)) 
     
  # t-SNE clusters plot
  p2 <- Seurat::TSNEPlot(DAT, do.return = T, do.label = T,label.size = labSize,
                 plot.title=paste("Unsupervised Clusters"), vector.friendly=T)  +
    theme(legend.position = "top") + 
    scale_color_brewer( palette = "Set1", aesthetics = aes(alpha=.5))  
  print(cowplot::plot_grid(p1,p2))
}   

Garnett cell_type

tSNE_metadata_plot("cell_type")

Garnett cluster_ext_type

tSNE_metadata_plot("cluster_ext_type")

mut

tSNE_metadata_plot("mut")

tSNE FeaturePlots

Monocyte Markers

FeaturePlot(DAT,features.plot =  c("CD14","FCGR3A"),  
            cols.use = c("grey","red","blue","purple"),
            dark.theme = T, nCol = 2, overlay = T, no.legend = F) 

PD Genes

FeaturePlot(DAT,features.plot = c("LRRK2", "GBA"),  
            cols.use = c("grey","purple","cyan","blue"),
            dark.theme = T, nCol = 2, overlay = T, no.legend = F) 

MS4A4A & MS4A6A

FeaturePlot(DAT, features.plot =  c("MS4A4A","MS4A6A"), 
            cols.use = c("grey","red","green","blue"),
            dark.theme = T, nCol = 2, overlay = T, no.legend = F)

Identify Cluster-specific Biomarkers

clust0.markers <- FindMarkers(DAT, ident.1=0, min.pct = 0.25, only.pos = F, test.use = "wilcox")
clust1.markers <- FindMarkers(DAT, ident.1=1, min.pct = 0.25, only.pos = F, test.use = "wilcox")

clust0.uniqueMarkers <- clust0.markers[!(row.names(clust0.markers) %in% row.names(clust1.markers)),] %>%
  subset(p_val_adj<=0.05)
clust1.uniqueMarkers <- clust1.markers[!(row.names(clust1.markers) %in% row.names(clust0.markers)),] %>%
  subset(p_val_adj<=0.05)

difference <- abs( length(row.names(clust0.uniqueMarkers)) - length(row.names(clust1.uniqueMarkers) ) )
uniqueMarkers <- data.frame(Cluster0_markers=c(row.names(clust0.uniqueMarkers), rep("",difference) ),
                            Cluster1_markers=row.names(clust1.uniqueMarkers))
write.csv(uniqueMarkers,
          file.path(resultsPath,"unique_cluster_markers.csv"), 
          quote = F, row.names = F)

DGE Across Clusters

DGE Across Clusters: dx

# clustDAT@meta.data$dx %>% unique()
DEGs <- runDGE(clustDAT, meta_var = "dx", group1 = "PD", group2 = "control")

DGE Across Clusters: mut

# clustDAT@meta.data$dx %>% unique()
DEGs <- runDGE(clustDAT, meta_var = "mut", group1 = "PD", group2 = "GBA")
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_text_repel).

DGE Within Clusters

DGE Within Clusters: dx

DGE_within_clusters(clustDAT,  meta_var = "dx", group1 = "PD", group2 = "control", clusterList = c(0,1))

Cluster 0: PD vs. control

Cluster 1: PD vs. control

DGE Within Clusters: mut

DGE_within_clusters(DAT, meta_var = "mut", group1 = "PD", group2 = "GBA", clusterList = c(0,1))

Cluster 0: PD vs. GBA

## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_text_repel).

Cluster 1: PD vs. GBA

## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing missing values (geom_text_repel).

DEG Enrichment w/ enrichR

enrichr_dbs <- c("KEGG_2018", "Reactome_2016",
                 "GO_Biological_Process_2018", "GO_Molecular_Function_2018", "GO_Cellular_Component_2018", 
                 "Rare_Diseases_AutoRIF_ARCHS4_Predictions", "Human_Gene_Atlas")
# createDT(enrichR::listEnrichrDbs(), "Enrichr Databases")
 
geneList <- data.frame(Gene=row.names(MonocyteSubtype.markers), 
     Weight=scales::rescale(length(MonocyteSubtype.markers$p_val_adj):1))

results <- enrichr(genes = geneList, databases = enrichr_dbs ) 

Uploading data to Enrichr… Done. Querying KEGG_2018… Done. Querying Reactome_2016… Done. Querying GO_Biological_Process_2018… Done. Querying GO_Molecular_Function_2018… Done. Querying GO_Cellular_Component_2018… Done. Querying Rare_Diseases_AutoRIF_ARCHS4_Predictions… Done. Querying Human_Gene_Atlas… Done. Parsing results… Done.

for (db in enrichr_dbs){
  cat('\n')
  cat("##",db,"\n")  
  # res <- subset(results[[db]], Adjusted.P.value<=0.05)
  createDT_html(results[[db]], paste("Enrichr Results:",db))
  cat('\n')
}  

KEGG_2018

Reactome_2016

GO_Biological_Process_2018

GO_Molecular_Function_2018

GO_Cellular_Component_2018

Rare_Diseases_AutoRIF_ARCHS4_Predictions

Human_Gene_Atlas